###############################################
### Replication Code for Appendix F.3       ###
### Title: Self-Reported Political Ideology ###
### Authors: Eddy S. F. Yeung, Kai Quek     ###
### Version: November 21, 2023              ###
###############################################

### Set-up ----
## Clean the R environment and set the working directory
rm(list = ls())
setwd("~/Desktop/ideology-measure/replication")

## Load the required packages
library(tidyverse) # version 1.3.1
library(estimatr)  # version 1.0.0
library(bootcorci) # version 0.0.0.9000
library(cowplot)   # version 1.1.1
library(texreg)    # version 1.37.5
library(grid)      # version 4.0.1
library(gridExtra) # version 2.3
library(extrafont) # version 0.17

## Import the dataset
df <- read.csv("survey_data.csv")
nrow(df)

## Drop non-American respondents
df <- df %>% filter(citizen == 1)
nrow(df)

### Recode sociodemographic variables ----
## Party identification (1 = strong Democrat; 7 = strong Republican)
df <- df %>% mutate(pid = case_when(
  pid1 == 1 & pid2d == 1 ~ 1,
  pid1 == 1 & pid2d == 2 ~ 2,
  (pid1 == 3 | pid1 == 4) & pid2n == 2 ~ 3,
  (pid1 == 3 | pid1 == 4) & pid2n == 3 ~ 4,
  (pid1 == 3 | pid1 == 4) & pid2n == 1 ~ 5,
  pid1 == 2 & pid2r == 2 ~ 6,
  pid1 == 2 & pid2r == 1 ~ 7))
df$dem <- ifelse(df$pid >= 1 & df$pid <= 3, 1, 0) # (1 = Democrat)
df$gop <- ifelse(df$pid >= 5 & df$pid <= 7, 1, 0) # (1 = Republican)
df$indep <- ifelse(df$pid == 4, 1, 0)             # (1 = independent)

## Race (1 = Black)
df$black <- ifelse(df$racial == 2, 1, 0)

## Gender (1 = female)
df$female <- ifelse(df$gender == 2, 1, 0)

## Education (1 = college graduate)
df$college <- ifelse(df$edu >= 5, 1, 0)

## Political knowledge (0 = least knowledgeable; 4 = most knowledgeable)
df$pol_correct1 <- ifelse(df$know1 == 1, 1, 0)
df$pol_correct2 <- ifelse(df$know2 == 4, 1, 0)
df$pol_correct3 <- ifelse(df$know3 == 2, 1, 0)
df$pol_correct4 <- ifelse(df$know4 == 2, 1, 0)
df$pol_know <- df$pol_correct1 + df$pol_correct2 + df$pol_correct3 + df$pol_correct4
df$sophis <- ifelse(df$pol_know >= 3, 1, 0) # (1 = politically sophisticated)

## Age
df$age <- df$yob + 11

## Income (0 = lowest; 16 = highest)
df$income <- ifelse(df$income == 88, NA, df$income - 1)

### Recode ideology variables ----
## Political ideology (1 = extremely liberal; 7 = extremely conservative)
df <- df %>% 
  mutate(ideo = case_when(
    randomizer == 1 | randomizer == 2 ~ leftright1,
    randomizer == 3 ~ leftright3,
    randomizer == 4 ~ leftright4a)) %>% 
  mutate(ideo2 = case_when(
    randomizer == 1 | randomizer == 2 ~ leftright1,
    randomizer == 3 ~ leftright3,
    randomizer == 4 ~ leftright4b))
df$liberal <- ifelse(df$ideo2 >= 1 & df$ideo2 <= 3, 1, 0) # (1 = self-id liberal)
df$conserv <- ifelse(df$ideo2 >= 5 & df$ideo2 <= 7, 1, 0) # (1 = self-id conservative)
df$moderate <- ifelse(df$ideo2 == 4, 1, 0)                # (1 = self-id moderate)

## Ideological knowledge (0 = least knowledgeable; 4 = most knowledgeable)
df$ideo_correct1 <- ifelse(df$lib1 == 1, 1, 0)
df$ideo_correct2 <- ifelse(df$lib2 == 2, 1, 0)
df$ideo_correct3 <- ifelse(df$lib3 == 2, 1, 0)
df$ideo_correct5 <- ifelse(df$lib5 == 1, 1, 0)
df$ideo_know <- df$ideo_correct1 + df$ideo_correct2 + df$ideo_correct3 + df$ideo_correct5
table(df$ideo_know)

### Define experimental groups ----
df <- df %>% mutate(group = case_when(
  randomizer == 1 | randomizer == 2 ~ 0, # control group
  randomizer == 3 ~ 1,                   # Add Definitions condition
  randomizer == 4 ~ 2))                  # Subtract Labels condition
df$group <- as.factor(df$group)
df0 <- subset(df, group == 0) # create a subset for the control group
df1 <- subset(df, group == 1) # create a subset for Add Definitions condition
df2 <- subset(df, group == 2) # create a subset for Subtract Labels condition

### Table S3 ----
## Create an empty data frame to store the results first
summary_knowledge <- data.frame(matrix(NA, nrow = 4, ncol = 4))
colnames(summary_knowledge) <- c("question", "estimate", "lwr", "upr")
for (i in 1:4) {summary_knowledge[i, 1] <- i}

## Percentage of correct responses to question 1
# (The government should play an active role in supporting social and political change)
temp <- prop.test(x = sum(df$ideo_correct1, na.rm = T), 
                  n = sum(!is.na(df$ideo_correct1)), 
                  correct = F)
summary_knowledge[1, 2:4] <- c(temp$estimate, temp$conf.int[1], temp$conf.int[2])

## Percentage of correct responses to question 2
# (Social institutions and the free market solve problems better than governments do)
temp <- prop.test(x = sum(df$ideo_correct2, na.rm = T), 
                  n = sum(!is.na(df$ideo_correct2)), 
                  correct = F)
summary_knowledge[2, 2:4] <- c(temp$estimate, temp$conf.int[1], temp$conf.int[2])

## Percentage of correct responses to question 3
# (A powerful government is a threat to citizens' freedom)
temp <- prop.test(x = sum(df$ideo_correct3, na.rm = T), 
                  n = sum(!is.na(df$ideo_correct3)), 
                  correct = F)
summary_knowledge[3, 2:4] <- c(temp$estimate, temp$conf.int[1], temp$conf.int[2])

## Percentage of correct responses to question 5 (named Q4 but not Q5 in Table S3)
# (The government should play a strong role in the economy and the provision of social services)
temp <- prop.test(x = sum(df$ideo_correct1, na.rm = T), 
                  n = sum(!is.na(df$ideo_correct5)), 
                  correct = F)
summary_knowledge[4, 2:4] <- c(temp$estimate, temp$conf.int[1], temp$conf.int[2])

## Show the summary table
# Q5 is renamed as Q4 in Table S3
summary_knowledge

## Identify DK responses for each question
df$ideo_DK1 <- ifelse(df$lib1 == 4, 1, 0)
df$ideo_DK2 <- ifelse(df$lib2 == 4, 1, 0)
df$ideo_DK3 <- ifelse(df$lib3 == 4, 1, 0)
df$ideo_DK5 <- ifelse(df$lib5 == 4, 1, 0)

## Create an empty data frame to store the results first
summary_knowledge_DK <- data.frame(matrix(NA, nrow = 4, ncol = 4))
colnames(summary_knowledge_DK) <- c("question", "estimate", "lwr", "upr")
for (i in 1:4) {summary_knowledge_DK[i, 1] <- i}

## Percentage of DKs for question 1
temp <- prop.test(x = sum(df$ideo_DK1, na.rm = T), 
                  n = sum(!is.na(df$ideo_DK1)), 
                  correct = F)
summary_knowledge_DK[1, 2:4] <- c(temp$estimate, temp$conf.int[1], temp$conf.int[2])

## Percentage of DKs for question 2
temp <- prop.test(x = sum(df$ideo_DK2, na.rm = T), 
                  n = sum(!is.na(df$ideo_DK2)), 
                  correct = F)
summary_knowledge_DK[2, 2:4] <- c(temp$estimate, temp$conf.int[1], temp$conf.int[2])

## Percentage of DKs for question 3
temp <- prop.test(x = sum(df$ideo_DK3, na.rm = T), 
                  n = sum(!is.na(df$ideo_DK3)), 
                  correct = F)
summary_knowledge_DK[3, 2:4] <- c(temp$estimate, temp$conf.int[1], temp$conf.int[2])

## Percentage of DKs for question 5
temp <- prop.test(x = sum(df$ideo_DK5, na.rm = T), 
                  n = sum(!is.na(df$ideo_DK5)), 
                  correct = F)
summary_knowledge_DK[4, 2:4] <- c(temp$estimate, temp$conf.int[1], temp$conf.int[2])

## Show the summary table
# The original Q4 (free trade item) is not reported in Table S3
# Q5 is renamed as Q4 in Table S3
summary_knowledge_DK

### Figure S11 ----
## Create an empty data frame to store the results first
summary_know <- data.frame(matrix(NA, nrow = 12, ncol = 5))
colnames(summary_know) <- c("question", "estimate", "lwr", "upr", "Self-Reported Ideology")
summary_know[1:3, 1] <- 1
summary_know[4:6, 1] <- 2
summary_know[7:9, 1] <- 3
summary_know[10:12, 1] <- 5
summary_know[c(1, 4, 7, 10), 5] <- "Liberal"
summary_know[c(2, 5, 8, 11), 5] <- "Conservative"
summary_know[c(3, 6, 9, 12), 5] <- "Moderate"

## Percentage of correct responses to question 1 by self-reported ideology
# Liberal
temp <- prop.test(x = sum(df$ideo_correct1[df$liberal == 1], na.rm = T), 
                  n = sum(!is.na(df$ideo_correct1[df$liberal == 1])), 
                  correct = F)
summary_know[1, 2:4] <- c(temp$estimate, temp$conf.int[1], temp$conf.int[2])

# Conservative
temp <- prop.test(x = sum(df$ideo_correct1[df$conserv == 1], na.rm = T), 
                  n = sum(!is.na(df$ideo_correct1[df$conserv == 1])), 
                  correct = F)
summary_know[2, 2:4] <- c(temp$estimate, temp$conf.int[1], temp$conf.int[2])

# Moderate
temp <- prop.test(x = sum(df$ideo_correct1[df$moderate == 1], na.rm = T), 
                  n = sum(!is.na(df$ideo_correct1[df$moderate == 1])), 
                  correct = F)
summary_know[3, 2:4] <- c(temp$estimate, temp$conf.int[1], temp$conf.int[2])

## Percentage of correct responses to question 2 by self-reported ideology
# Liberal
temp <- prop.test(x = sum(df$ideo_correct2[df$liberal == 1], na.rm = T), 
                  n = sum(!is.na(df$ideo_correct2[df$liberal == 1])), 
                  correct = F)
summary_know[4, 2:4] <- c(temp$estimate, temp$conf.int[1], temp$conf.int[2])

# Conservative
temp <- prop.test(x = sum(df$ideo_correct2[df$conserv == 1], na.rm = T), 
                  n = sum(!is.na(df$ideo_correct2[df$conserv == 1])), 
                  correct = F)
summary_know[5, 2:4] <- c(temp$estimate, temp$conf.int[1], temp$conf.int[2])

# Moderate
temp <- prop.test(x = sum(df$ideo_correct2[df$moderate == 1], na.rm = T), 
                  n = sum(!is.na(df$ideo_correct2[df$moderate == 1])), 
                  correct = F)
summary_know[6, 2:4] <- c(temp$estimate, temp$conf.int[1], temp$conf.int[2])

## Percentage of correct responses to question 3 by self-reported ideology
# Liberal
temp <- prop.test(x = sum(df$ideo_correct3[df$liberal == 1], na.rm = T), 
                  n = sum(!is.na(df$ideo_correct3[df$liberal == 1])), 
                  correct = F)
summary_know[7, 2:4] <- c(temp$estimate, temp$conf.int[1], temp$conf.int[2])

# Conservative
temp <- prop.test(x = sum(df$ideo_correct3[df$conserv == 1], na.rm = T), 
                  n = sum(!is.na(df$ideo_correct3[df$conserv == 1])), 
                  correct = F)
summary_know[8, 2:4] <- c(temp$estimate, temp$conf.int[1], temp$conf.int[2])

# Moderate
temp <- prop.test(x = sum(df$ideo_correct3[df$moderate == 1], na.rm = T), 
                  n = sum(!is.na(df$ideo_correct3[df$moderate == 1])), 
                  correct = F)
summary_know[9, 2:4] <- c(temp$estimate, temp$conf.int[1], temp$conf.int[2])

## Percentage of correct responses to question 5 by self-reported ideology
# Liberal
temp <- prop.test(x = sum(df$ideo_correct5[df$liberal == 1], na.rm = T), 
                  n = sum(!is.na(df$ideo_correct5[df$liberal == 1])), 
                  correct = F)
summary_know[10, 2:4] <- c(temp$estimate, temp$conf.int[1], temp$conf.int[2])

# Conservative
temp <- prop.test(x = sum(df$ideo_correct5[df$conserv == 1], na.rm = T), 
                  n = sum(!is.na(df$ideo_correct5[df$conserv == 1])), 
                  correct = F)
summary_know[11, 2:4] <- c(temp$estimate, temp$conf.int[1], temp$conf.int[2])

# Moderate
temp <- prop.test(x = sum(df$ideo_correct5[df$moderate == 1], na.rm = T), 
                  n = sum(!is.na(df$ideo_correct5[df$moderate == 1])), 
                  correct = F)
summary_know[12, 2:4] <- c(temp$estimate, temp$conf.int[1], temp$conf.int[2])

## Visualize the results
summary_know$`Self-Reported Ideology` <- 
  factor(summary_know$`Self-Reported Ideology`,
         levels = c("Liberal", "Moderate", "Conservative"),
         labels = c("Liberal", "Moderate", "Conservative"))
summary_know$question <- 
  factor(summary_know$question, 
         levels = c(1, 2, 3, 5), 
         labels = c("Q1: Gov't Should Be Active\nin Supporting Change\n[Liberal]",
                    "Q2: Social Inst. and\nFree Market Are Better\n[Conservative]", 
                    "Q3: Powerful Gov't Is\na Threat to Freedom\n[Conservative]",
                    "Q4: Gov't Should Be Strong\nin Economy and Services\n[Liberal]"))
summary_know$estimate <- summary_know$estimate * 100
summary_know$lwr <- summary_know$lwr * 100
summary_know$upr <- summary_know$upr * 100
ideo_correct <- 
  ggplot(data = summary_know,
         aes(x = question, y = estimate, 
             color = `Self-Reported Ideology`, 
             fill = `Self-Reported Ideology`)) +
  geom_bar(stat = "identity", position = position_dodge(.9), color = "black") +
  scale_color_manual(values = c("grey50", "grey70", "grey90")) +
  scale_fill_manual(values = c("grey50", "grey70", "grey90")) +
  geom_errorbar(aes(ymin = lwr, ymax = upr), 
                linewidth = .5, width = .1, 
                position = position_dodge(.9), color = "black") +
  xlab("") + 
  ylab("Percentage of Correct Responses (%)") +
  theme_bw() +
  theme(text = element_text(color = "black", size = 12, family = "Times"),
        axis.text = element_text(color = "black", family = "Times", size = 11), 
        legend.justification = c(1, 1),
        legend.position = c(1, 1),
        legend.background = element_blank(),
        legend.box.background = element_rect(color = "black"),
        legend.key.size = unit(1.5, "line"),
        legend.key.height = unit(0, "cm")) +
  coord_cartesian(ylim = c(0, 100))
ideo_correct

### Figure S12 ----
## Create a variable indicating a respondent's no. of correct answers to con. questions
df$con_knowledge <- df$ideo_correct2 + df$ideo_correct3

## Create a variable indicating a respondent's no. of correct answers to lib. questions
df$lib_knowledge <- df$ideo_correct1 + df$ideo_correct5

## Create an empty data frame to store the results
summary_know2 <- data.frame(matrix(NA, nrow = 4, ncol = 5))
colnames(summary_know2) <- c("group", "estimate", "lwr", "upr", "Self-Reported Ideology")
summary_know2[1:2, 1] <- "Questions with \"Liberal\"\nas the Correct Answer"
summary_know2[3:4, 1] <- "Questions with \"Conservative\"\nas the Correct Answer"
summary_know2[1:4, 5] <- c("Liberal", "Conservative", "Liberal", "Conservative")

## Obtain summary statistics for each group
df$con_lib <- ifelse(df$conserv == 1, 1, ifelse(df$liberal == 1, 0, NA)) # (1 = con.; 0 = lib.)
n_con_know <- sum(!is.na(df$con_knowledge[df$con_lib == 1]))
n_lib_know <- sum(!is.na(df$lib_knowledge[df$con_lib == 0]))
temp <- df %>% 
  group_by(con_lib) %>% 
  summarize(mean_con_know = mean(con_knowledge, na.rm = T),
            sd_con_know = sd(con_knowledge, na.rm = T),
            mean_lib_know = mean(lib_knowledge, na.rm = T),
            sd_lib_know = sd(lib_knowledge, na.rm = T)) %>% 
  mutate(se_con_know = sd_con_know / sqrt(n_con_know),
         lwr_ci_con_know = mean_con_know - 
           qt(1 - (.05/2), n_con_know - 1) * se_con_know,
         upr_ci_con_know = mean_con_know + 
           qt(1 - (.05/2), n_con_know - 1) * se_con_know,
         se_lib_know = sd_lib_know / sqrt(n_lib_know),
         lwr_ci_lib_know = mean_lib_know - 
           qt(1 - (.05/2), n_lib_know - 1) * se_lib_know,
         upr_ci_lib_know = mean_lib_know + 
           qt(1 - (.05/2), n_lib_know - 1) * se_lib_know)
summary_know2[3, c(2:4)] <- c(temp$mean_con_know[1], temp$lwr_ci_con_know[1], 
                              temp$upr_ci_con_know[1])
summary_know2[1, c(2:4)] <- c(temp$mean_lib_know[1], temp$lwr_ci_lib_know[1], 
                              temp$upr_ci_lib_know[1])
summary_know2[4, c(2:4)] <- c(temp$mean_con_know[2], temp$lwr_ci_con_know[2], 
                              temp$upr_ci_con_know[2])
summary_know2[2, c(2:4)] <- c(temp$mean_lib_know[2], temp$lwr_ci_lib_know[2], 
                              temp$upr_ci_lib_know[2])

## Plot the graph
summary_know2$`Self-Reported Ideology` <- 
  factor(summary_know2$`Self-Reported Ideology`,
         levels = c("Liberal", "Conservative"),
         labels = c("Liberal", "Conservative"))
ideo_con_lib <- 
  ggplot(data = summary_know2, 
         aes(x = group, y = estimate, 
             color = `Self-Reported Ideology`, 
             fill = `Self-Reported Ideology`)) +
  geom_bar(stat = "identity", position = position_dodge(.9), color = "black") +
  scale_color_manual(values = c("grey50", "grey90")) +
  scale_fill_manual(values = c("grey50", "grey90")) +
  geom_errorbar(aes(ymin = lwr, ymax = upr), 
                linewidth = .5, width = .1, 
                position = position_dodge(.9), color = "black") +
  xlab("") + 
  ylab("Average Number of Correct Responses") +
  theme_bw() +
  theme(text = element_text(color = "black", size = 12, family = "Times"),
        axis.text = element_text(color = "black", family = "Times", size = 11), 
        legend.justification = c(1, 1),
        legend.position = c(1, 1),
        legend.background = element_blank(),
        legend.box.background = element_rect(color = "black"),
        legend.key.size = unit(1.5, "line"),
        legend.key.height = unit(0, "cm")) +
  coord_cartesian(ylim = c(0, 2))
ideo_con_lib

### Figure S13 ----
## Partisanship
table(df$dem, df$ideo_know)
p1 <- ggplot(subset(df, dem == 1), aes(x = ideo_know, fill = ideo_know >= 3)) + 
  geom_bar(aes(y = after_stat(count) / sum(after_stat(count))), 
           color = "black", na.rm = T) +
  scale_fill_manual(values = c("grey80", "grey20")) +
  scale_y_continuous(labels = scales::percent) +
  theme_bw() + 
  ggtitle(expression(paste("Democrats (", italic("n"), " = 1,249)"))) +
  xlab("") +
  ylab("") +
  coord_cartesian(ylim = c(0, .35)) +
  theme(text = element_text(family = "Times", size = 12),
        axis.text = element_text(family = "Times", size = 12),
        legend.position = "none")

table(df$gop, df$ideo_know)
p2 <- ggplot(subset(df, gop == 1), aes(x = ideo_know, fill = ideo_know >= 3)) + 
  geom_bar(aes(y = after_stat(count) / sum(after_stat(count))), 
           color = "black", na.rm = T) +
  scale_fill_manual(values = c("grey80", "grey20")) +
  scale_y_continuous(labels = scales::percent) +
  theme_bw() + 
  ggtitle(expression(paste("Republicans (", italic("n"), " = 1,014)"))) +
  xlab("") +
  ylab("") +
  coord_cartesian(ylim = c(0, .35)) +
  theme(text = element_text(family = "Times", size = 12),
        axis.text = element_text(family = "Times", size = 12),
        legend.position = "none")

## Race
table(df$black, df$ideo_know)
p3 <- ggplot(subset(df, black == 1), aes(x = ideo_know, fill = ideo_know >= 3)) + 
  geom_bar(aes(y = after_stat(count) / sum(after_stat(count))), 
           color = "black", na.rm = T) +
  scale_fill_manual(values = c("grey80", "grey20")) +
  scale_y_continuous(labels = scales::percent) +
  theme_bw() + 
  ggtitle(expression(paste("Blacks (", italic("n"), " = 394)"))) +
  xlab("") +
  ylab("") +
  coord_cartesian(ylim = c(0, .35)) +
  theme(text = element_text(family = "Times", size = 12),
        axis.text = element_text(family = "Times", size = 12),
        legend.position = "none")

p4 <- ggplot(subset(df, black == 0), aes(x = ideo_know, fill = ideo_know >= 3)) + 
  geom_bar(aes(y = after_stat(count) / sum(after_stat(count))), 
           color = "black", na.rm = T) +
  scale_fill_manual(values = c("grey80", "grey20")) +
  scale_y_continuous(labels = scales::percent) +
  theme_bw() + 
  ggtitle(expression(paste("Non-Blacks (", italic("n"), " = 2,380)"))) +
  xlab("") +
  ylab("") +
  coord_cartesian(ylim = c(0, .35)) +
  theme(text = element_text(family = "Times", size = 12),
        axis.text = element_text(family = "Times", size = 12),
        legend.position = "none")

## Sex
table(df$female, df$ideo_know)
p5 <- ggplot(subset(df, female == 1), aes(x = ideo_know, fill = ideo_know >= 3)) + 
  geom_bar(aes(y = after_stat(count) / sum(after_stat(count))), 
           color = "black", na.rm = T) +
  scale_fill_manual(values = c("grey80", "grey20")) +
  scale_y_continuous(labels = scales::percent) +
  theme_bw() + 
  ggtitle(expression(paste("Female (", italic("n"), " = 1,464)"))) +
  xlab("") +
  ylab("") +
  coord_cartesian(ylim = c(0, .35)) +
  theme(text = element_text(family = "Times", size = 12),
        axis.text = element_text(family = "Times", size = 12),
        legend.position = "none")

p6 <- ggplot(subset(df, female == 0), aes(x = ideo_know, fill = ideo_know >= 3)) + 
  geom_bar(aes(y = after_stat(count) / sum(after_stat(count))), 
           color = "black", na.rm = T) +
  scale_fill_manual(values = c("grey80", "grey20")) +
  scale_y_continuous(labels = scales::percent) +
  theme_bw() + 
  ggtitle(expression(paste("Male (", italic("n"), " = 1,310)"))) +
  xlab("") +
  ylab("") +
  coord_cartesian(ylim = c(0, .35)) +
  theme(text = element_text(family = "Times", size = 12),
        axis.text = element_text(family = "Times", size = 12),
        legend.position = "none")

## Political knowledge
table(df$sophis, df$ideo_know)
p7 <- ggplot(subset(df, sophis == 1), aes(x = ideo_know, fill = ideo_know >= 3)) + 
  geom_bar(aes(y = after_stat(count) / sum(after_stat(count))), 
           color = "black", na.rm = T) +
  scale_fill_manual(values = c("grey80", "grey20")) +
  scale_y_continuous(labels = scales::percent) +
  theme_bw() + 
  ggtitle(expression(paste("Politically Sophisticated (", italic("n"), " = 1,137)"))) +
  xlab("") +
  ylab("") +
  coord_cartesian(ylim = c(0, .35)) +
  theme(text = element_text(family = "Times", size = 12),
        axis.text = element_text(family = "Times", size = 12),
        legend.position = "none")

p8 <- ggplot(subset(df, sophis == 0), aes(x = ideo_know, fill = ideo_know >= 3)) + 
  geom_bar(aes(y = after_stat(count) / sum(after_stat(count))), 
           color = "black", na.rm = T) +
  scale_fill_manual(values = c("grey80", "grey20")) +
  scale_y_continuous(labels = scales::percent) +
  theme_bw() + 
  ggtitle(expression(paste("Not Politically Sophisticated (", italic("n"), " = 1,620)"))) +
  xlab("") +
  ylab("") +
  coord_cartesian(ylim = c(0, .35)) +
  theme(text = element_text(family = "Times", size = 12),
        axis.text = element_text(family = "Times", size = 12),
        legend.position = "none")

## Add vertical lines that show subgroup means
p1 <- p1 + geom_vline(xintercept = mean(subset(df, dem == 1)$ideo_know, na.rm = T), 
                      linetype = "longdash", color = "blue", size = 1)
p2 <- p2 + geom_vline(xintercept = mean(subset(df, gop == 1)$ideo_know, na.rm = T), 
                      linetype = "longdash", color = "blue", size = 1)
p3 <- p3 + geom_vline(xintercept = mean(subset(df, black == 1)$ideo_know, na.rm = T), 
                      linetype = "longdash", color = "blue", size = 1)
p4 <- p4 + geom_vline(xintercept = mean(subset(df, black == 0)$ideo_know, na.rm = T), 
                      linetype = "longdash", color = "blue", size = 1)
p5 <- p5 + geom_vline(xintercept = mean(subset(df, female == 1)$ideo_know, na.rm = T), 
                      linetype = "longdash", color = "blue", size = 1)
p6 <- p6 + geom_vline(xintercept = mean(subset(df, female == 0)$ideo_know, na.rm = T), 
                      linetype = "longdash", color = "blue", size = 1)
p7 <- p7 + geom_vline(xintercept = mean(subset(df, sophis == 1)$ideo_know, na.rm = T), 
                      linetype = "longdash", color = "blue", size = 1)
p8 <- p8 + geom_vline(xintercept = mean(subset(df, sophis == 0)$ideo_know, na.rm = T), 
                      linetype = "longdash", color = "blue", size = 1)

## Combine into one graph
knowledge_by_group <- plot_grid(p1, p2, p3, p4, p5, p6, p7, p8,
                                labels = "AUTO", ncol = 2, 
                                label_fontfamily = "Times")
knowledge_by_group <- 
  ggdraw(add_sub(knowledge_by_group, 
                 "Ideological Knowledge (0 = least knowledgeable, 4 = most knowledgeable)", 
                 vpadding = grid::unit(0, "lines"), y = 6, x = .52, 
                 vjust = 4.5, fontfamily = "Times"))
knowledge_by_group

### Figure S14 ----
## General distributions
ideo_by_group <- df %>% 
  filter(ideo >= 1 & ideo <= 7) %>% 
  group_by(group, ideo) %>% 
  summarize(count = n()) %>% 
  mutate(freq = formattable::percent(count / sum(count)))
ideo_by_group$group <- 
  factor(ideo_by_group$group, 
         levels = c(0, 1, 2), 
         labels = c("ANES Measure", "Add Definitions", "Subtract Labels"))

## Standard ANES measure vs. measure with definitions
group_mean_temp1 <- df %>% 
  filter(group == 0 | group == 1) %>% 
  group_by(group) %>% 
  summarize(xvalue = mean(ideo, na.rm = T))
p1 <- ggplot(data = subset(ideo_by_group, group != "Subtract Labels"),
             aes(x = ideo, y = freq * 100, color = group, fill = group)) +
  geom_bar(stat = "identity", position = position_dodge(.9), 
           color = "black", alpha = 0.8) +
  scale_color_manual(values = c("grey80", "#0072B2")) +
  scale_fill_manual("Experimental Condition", values = c("grey80", "#0072B2")) +
  xlab("Self-Reported Ideology") + 
  ylab("Percentage of Respondents (%)") +
  ggtitle("Standard ANES Measure vs. Measure with Definitions") +
  theme_bw() +
  theme(text = element_text(color = "black", size = 12, family = "Times"),
        axis.text = element_text(color = "black", family = "Times", size = 11), 
        legend.justification = c(1, 1),
        legend.position = c(1, 1),
        legend.background = element_rect(fill = "white", color = "black"),
        legend.box.background = element_rect(color = "black"),
        legend.key.size = unit(1.5, "line"),
        legend.key.height = unit(0, "cm")) +
  coord_cartesian(ylim = c(0, 50))
p1 <- p1 + 
  geom_vline(data = group_mean_temp1,
             aes(xintercept = xvalue), color = c("black", "#0072B2"),
             linetype = c("dotted", "longdash"), size = 0.5, show.legend = F)

## Standard ANES measure vs. label-free measure
group_mean_temp2 <- df %>% 
  filter(group == 0 | group == 2) %>% 
  group_by(group) %>% 
  summarize(xvalue = mean(ideo, na.rm = T))
p2 <- ggplot(data = subset(ideo_by_group, group != "Add Definitions"),
             aes(x = ideo, y = freq * 100, color = group, fill = group)) +
  geom_bar(stat = "identity", position = position_dodge(.9), 
           color = "black", alpha = 0.8) +
  scale_color_manual(values = c("grey80", "#D55E00")) +
  scale_fill_manual("Experimental Condition", values = c("grey80", "#D55E00")) +
  xlab("Self-Reported Ideology") + 
  ylab("Percentage of Respondents (%)") +
  ggtitle("Standard ANES Measure vs. Label-Free Measure") +
  theme_bw() +
  theme(text = element_text(color = "black", size = 12, family = "Times"),
        axis.text = element_text(color = "black", family = "Times", size = 11), 
        legend.justification = c(1, 1),
        legend.position = c(1, 1),
        legend.background = element_rect(fill = "white", color = "black"),
        legend.box.background = element_rect(color = "black"),
        legend.key.size = unit(1.5, "line"),
        legend.key.height = unit(0, "cm")) +
  coord_cartesian(ylim = c(0, 50))
p2 <- p2 + 
  geom_vline(data = group_mean_temp2,
             aes(xintercept = xvalue), color = c("black", "#D55E00"),
             linetype = c("dotted", "longdash"), size = 0.5, show.legend = F)

## Combine into one graph
distribution <- plot_grid(p1, p2, labels = "AUTO", ncol = 2, 
                          label_fontfamily = "Times")
distribution

### Table S4 ----
## Regression analysis
mod0 <- lm_robust(ideo ~ group, 
                  data = df)
mod1 <- lm_robust(ideo ~ group + gop + dem + black + female + sophis, 
                  data = df)
mod2 <- lm_robust(ideo ~ group + gop + dem + black + female + sophis +
                    group * (gop + dem), 
                  data = df)
mod3 <- lm_robust(ideo ~ group + gop + dem + black + female + sophis +
                    group * black, 
                  data = df)
mod4 <- lm_robust(ideo ~ group + gop + dem + black + female + sophis +
                    group * female, 
                  data = df)
mod5 <- lm_robust(ideo ~ group + gop + dem + black + female + sophis +
                    group * sophis, 
                  data = df)
mod6 <- lm_robust(ideo ~ group * (gop + dem + black + female + sophis), 
                  data = df)
texreg(list(mod0, mod1, mod2, mod3, mod4, mod5, mod6),
       stars = 0, 
       include.ci = F,
       custom.header = list("Dependent Variable: Self-Reported Ideology" = 1:7),
       custom.coef.names = 
         c("Constant", "Add Definitions", "Subtract Labels", 
           "Republican", "Democrat", "Black", "Female", "Politically Sophisticated (PS)",
           "Add Definitions × Republican", "Subtract Labels × Republican", 
           "Add Definitions × Democrat", "Subtract Labels × Democrat", 
           "Add Definitions × Black", "Subtract Labels × Black", 
           "Add Definitions × Female", "Subtract Labels × Female",
           "Add Definitions × PS", "Subtract Labels × PS"),
       custom.note = "Entries are OLS estimates with robust standard errors in parentheses.",
       caption = "Self-Reported Ideologies in Different Social Groups Are Changed by Question Wording (Speeders Included)",
       fontsize = "small"
)

### Figure S15 ----
## Construct the variable that indicates within-subjects differences in ideology
df$ideo_change <- df$ideo - df$ideo2

## Distribution of within-subjects differences in ideology by partisanship
# Democrats
p1 <- ggplot(subset(df, group == 2 & dem == 1), aes(x = ideo_change)) + 
  geom_bar(color = "black", fill = "grey80", na.rm = T) +
  theme_bw() + 
  ggtitle("Democrats") +
  xlab("") +
  ylab("") +
  coord_cartesian(xlim = c(-6.5, 6.5), ylim = c(0, 155)) +
  theme(text = element_text(family = "Times", size = 12),
        axis.text = element_text(family = "Times", size = 12),
        legend.position = "none")

# Independents
p2 <- ggplot(subset(df, group == 2 & dem == 0 & gop == 0), aes(x = ideo_change)) + 
  geom_bar(color = "black", fill = "grey80", na.rm = T) +
  theme_bw() + 
  ggtitle("Independents") +
  xlab("") +
  ylab("") +
  coord_cartesian(xlim = c(-6.5, 6.5), ylim = c(0, 155)) +
  theme(text = element_text(family = "Times", size = 12),
        axis.text = element_text(family = "Times", size = 12),
        legend.position = "none")

# Republicans
p3 <- ggplot(subset(df, group == 2 & gop == 1), aes(x = ideo_change)) + 
  geom_bar(color = "black", fill = "grey80", na.rm = T) +
  theme_bw() + 
  ggtitle("Republicans") +
  xlab("") +
  ylab("") +
  coord_cartesian(xlim = c(-6.5, 6.5), ylim = c(0, 155)) +
  theme(text = element_text(family = "Times", size = 12),
        axis.text = element_text(family = "Times", size = 12),
        legend.position = "none")

## Add vertical lines that show subgroup means
p1 <- p1 + 
  geom_vline(xintercept = mean(subset(df, group == 2 & dem == 1)$ideo_change, na.rm = T), 
             linetype = "longdash", color = "blue", size = 1)
p2 <- p2 + 
  geom_vline(xintercept = mean(subset(df, group == 2 & dem == 0 & gop == 0)$ideo_change, na.rm = T), 
             linetype = "longdash", color = "blue", size = 1)
p3 <- p3 + 
  geom_vline(xintercept = mean(subset(df, group == 2 & gop == 1)$ideo_change, na.rm = T), 
             linetype = "longdash", color = "blue", size = 1)

## Add annotations (left arrow = more liberal; right arrow = more conservative)
p1 <- p1 + 
  geom_segment(aes(x = -2, y = 150, xend = -6, yend = 150), 
               arrow = arrow(length = unit(0.15, "cm")), 
               size = .25, color = "grey50") +
  annotate("text", x = -4, y = 155, label = "more liberal", 
           size = 2.5, color = "grey50", fontface = "italic", family = "Times") +
  geom_segment(aes(x = 2, y = 150, xend = 6, yend = 150), 
               arrow = arrow(length = unit(0.15, "cm")), 
               size = .25, color = "grey50") +
  annotate("text", x = 4, y = 155, label = "more conservative", 
           size = 2.5, color = "grey50", fontface = "italic", family = "Times")
p2 <- p2 + 
  geom_segment(aes(x = -2, y = 150, xend = -6, yend = 150), 
               arrow = arrow(length = unit(0.15, "cm")), 
               size = .25, color = "grey50") +
  annotate("text", x = -4, y = 155, label = "more liberal", 
           size = 2.5, color = "grey50", fontface = "italic", family = "Times") +
  geom_segment(aes(x = 2, y = 150, xend = 6, yend = 150), 
               arrow = arrow(length = unit(0.15, "cm")), 
               size = .25, color = "grey50") +
  annotate("text", x = 4, y = 155, label = "more conservative", 
           size = 2.5, color = "grey50", fontface = "italic", family = "Times")
p3 <- p3 + 
  geom_segment(aes(x = -2, y = 150, xend = -6, yend = 150), 
               arrow = arrow(length = unit(0.15, "cm")), 
               size = .25, color = "grey50") +
  annotate("text", x = -4, y = 155, label = "more liberal", 
           size = 2.5, color = "grey50", fontface = "italic", family = "Times") +
  geom_segment(aes(x = 2, y = 150, xend = 6, yend = 150), 
               arrow = arrow(length = unit(0.15, "cm")), 
               size = .25, color = "grey50") +
  annotate("text", x = 4, y = 155, label = "more conservative", 
           size = 2.5, color = "grey50", fontface = "italic", family = "Times")

## Combine into one graph
ideo_change_pid <- plot_grid(p1, p2, p3, labels = "AUTO", ncol = 3, 
                             label_fontfamily = "Times")
ideo_change_pid <- 
  ggdraw(add_sub(ideo_change_pid, 
                 "Within-Subjects Difference in Ideology (Label-Free Measure - ANES Measure)", 
                 vpadding = grid::unit(0, "lines"), y = 6, x = .52, 
                 vjust = 4.5, fontfamily = "Times"))
ideo_change_pid

### Figure S16 ----
## Calculate the baseline ideological differences between Democrats and Republicans
lm_robust(ideo2 ~ dem, data = df, 
          subset = group == 2 & (dem == 1 | gop == 1) & (ideo >= 1 & ideo <= 7))
# diff. = 2.12, SE = 0.10

## Calculate the ideological differences between Democrats and Republicans when 
## ideological labels are removed
lm_robust(ideo ~ dem, data = df, 
          subset = group == 2 & (dem == 1 | gop == 1) & (ideo2 >= 1 & ideo2 <= 7))
# diff. = 1.00, SE = 0.12

## Indicate respondents' partisanship
df2 <- df2 %>% mutate(party = case_when(
  pid1 == 1 & pid2d == 1 ~ "Democrats",
  pid1 == 1 & pid2d == 2 ~ "Democrats",
  (pid1 == 3 | pid1 == 4) & pid2n == 2 ~ "Democrats",
  (pid1 == 3 | pid1 == 4) & pid2n == 1 ~ "Republicans",
  pid1 == 2 & pid2r == 2 ~ "Republicans",
  pid1 == 2 & pid2r == 1 ~ "Republicans"))

## Obtain the distribution of self-reported ideology among partisans in the ANES measure
ideo_by_party_ANES <- df2 %>% 
  filter(ideo >= 1 & ideo <= 7) %>% 
  filter(ideo2 >= 1 & ideo2 <= 7) %>% 
  filter(party == "Democrats" | party == "Republicans") %>% 
  group_by(party, ideo2) %>% 
  summarize(count_ANES = n()) %>% 
  mutate(freq_ANES = formattable::percent(count_ANES / sum(count_ANES))) %>% 
  rename(ideo = ideo2)

## Obtain the distribution of self-reported ideology among partisans in the label-free measure
ideo_by_party_nolabel <- df2 %>% 
  filter(ideo >= 1 & ideo <= 7) %>% 
  filter(ideo2 >= 1 & ideo2 <= 7) %>% 
  filter(party == "Democrats" | party == "Republicans") %>% 
  group_by(party, ideo) %>% 
  summarize(count_nolabel = n()) %>% 
  mutate(freq_nolabel = formattable::percent(count_nolabel / sum(count_nolabel)))

## Merge the results
ideo_by_party <- merge(ideo_by_party_ANES, ideo_by_party_nolabel, by = c("ideo", "party"))

## Standard ANES measure in Subtract Labels condition
group_mean_temp1 <- df2 %>% 
  filter(ideo >= 1 & ideo <= 7) %>% 
  filter(ideo2 >= 1 & ideo2 <= 7) %>% 
  filter(party == "Democrats" | party == "Republicans") %>% 
  group_by(party) %>% 
  summarize(xvalue = mean(ideo2))
p1 <- ggplot(data = ideo_by_party, aes(x = ideo, y = freq_ANES * 100, 
                                       color = party, fill = party)) +
  geom_bar(stat = "identity", position = position_dodge(.9), 
           color = "black", alpha = 0.7) +
  scale_color_manual(values = c("blue", "red")) +
  scale_fill_manual("Partisanship", values = c("blue", "red")) +
  xlab("Self-Reported Ideology") + 
  ylab("Percentage of Respondents (%)") +
  ggtitle("ANES Measure") +
  theme_bw() +
  theme(text = element_text(color = "black", size = 12, family = "Times"),
        axis.text = element_text(color = "black", family = "Times", size = 11), 
        legend.justification = c(1, 1),
        legend.position = c(1, 1),
        legend.background = element_rect(fill = "white", color = "black"),
        legend.box.background = element_rect(color = "black"),
        legend.key.size = unit(1.5, "line"),
        legend.key.height = unit(0, "cm")) +
  coord_cartesian(ylim = c(0, 50))
p1 <- p1 + 
  geom_vline(data = group_mean_temp1,
             aes(xintercept = xvalue), color = c("blue", "red"),
             linetype = c("longdash", "longdash"), size = 0.5, show.legend = F)
p1 <- p1 + 
  geom_segment(aes(x = 4.116, y = 45, xend = 3.06, yend = 45), 
               arrow = arrow(length = unit(0.15, "cm")), 
               size = .5, color = "black") +
  geom_segment(aes(x = 4.116, y = 45, xend = 5.17, yend = 45), 
               arrow = arrow(length = unit(0.15, "cm")), 
               size = .5, color = "black") +
  annotate("text", x = 4.116, y = 48, label = "Diff. = 2.12\n(SE = 0.10)", 
           size = 3, color = "black", family = "Times")

## Label-free measure in Subtract Labels condition
group_mean_temp2 <- df2 %>% 
  filter(ideo >= 1 & ideo <= 7) %>% 
  filter(ideo2 >= 1 & ideo2 <= 7) %>% 
  filter(party == "Democrats" | party == "Republicans") %>% 
  group_by(party) %>% 
  summarize(xvalue = mean(ideo))
p2 <- ggplot(data = ideo_by_party, aes(x = ideo, y = freq_nolabel * 100, 
                                       color = party, fill = party)) +
  geom_bar(stat = "identity", position = position_dodge(.9), 
           color = "black", alpha = 0.7) +
  scale_color_manual(values = c("blue", "red")) +
  scale_fill_manual("Partisanship", values = c("blue", "red")) +
  xlab("Self-Reported Ideology") + 
  ylab("Percentage of Respondents (%)") +
  ggtitle("Label-Free Measure") +
  theme_bw() +
  theme(text = element_text(color = "black", size = 12, family = "Times"),
        axis.text = element_text(color = "black", family = "Times", size = 11), 
        legend.justification = c(1, 1),
        legend.position = c(1, 1),
        legend.background = element_rect(fill = "white", color = "black"),
        legend.box.background = element_rect(color = "black"),
        legend.key.size = unit(1.5, "line"),
        legend.key.height = unit(0, "cm")) +
  coord_cartesian(ylim = c(0, 50))
p2 <- p2 + 
  geom_vline(data = group_mean_temp2,
             aes(xintercept = xvalue), color = c("blue", "red"),
             linetype = c("longdash", "longdash"), size = 0.5, show.legend = F)
p2 <- p2 + 
  geom_segment(aes(x = 4.167, y = 45, xend = 3.65, yend = 45), 
               arrow = arrow(length = unit(0.15, "cm")), 
               size = .5, color = "black") +
  geom_segment(aes(x = 4.167, y = 45, xend = 4.69, yend = 45), 
               arrow = arrow(length = unit(0.15, "cm")), 
               size = .5, color = "black") +
  annotate("text", x = 4.167, y = 48, label = "Diff. = 1.00\n(SE = 0.12)", 
           size = 3, color = "black", family = "Times")

## Combine into one graph
ideo_close_gap <- plot_grid(p1, p2, labels = "AUTO", ncol = 2, 
                            label_fontfamily = "Times")
ideo_close_gap

### Figure S17 ----
## Test for equality of correlation coefficients (control vs. Add Definitions)
cor.test.plus <- function(x) {
  list(x, Standard.Error = unname(sqrt((1 - x$estimate^2) / x$parameter)))
}
cor.test.plus(cor.test(df0$pid, df0$ideo))
cor.test.plus(cor.test(df1$pid, df1$ideo))
res <- twocorci(df0$pid, df0$ideo, df1$pid, df1$ideo, method = "pearson")
res$difference
res$p.value

## Test for equality of correlation coefficients (control vs. Subtract Labels)
cor.test.plus(cor.test(df2$pid, df2$ideo))
res <- twocorci(df0$pid, df0$ideo, df2$pid, df2$ideo, method = "pearson")
res$difference
res$p.value

## Test for equality of correlation coefficients (within-group for Subtract Labels)
cor.test.plus(cor.test(df2$pid, df2$ideo2))
res <- twocorci(df2$pid, df2$ideo2, df2$pid, df2$ideo, method = "pearson")
res$estimate1
res$estimate2
res$difference
res$p.value

## Test for equality of correlation coefficients (Add Definitions vs. Subtract Labels)
res <- twocorci(df1$pid, df1$ideo, df2$pid, df2$ideo, method = "pearson")
res$estimate1
res$estimate2
res$difference
res$p.value

## Correlation between partisanship and ideology by experimental condition
# Standard ANES measure vs. measure with definitions
p1 <- ggplot(subset(df, group == 0 | group == 1), 
             aes(x = pid, y = ideo, color = group, linetype = group)) +
  geom_jitter(position = position_jitter(width = .15, height = .2, seed = 1234567),
              size = 1, alpha = .1, na.rm = T) +
  stat_smooth(method = "lm", formula = y ~ x, se = F, na.rm = T) + 
  scale_color_manual(values = c("black", "#0072B2")) +
  scale_linetype_manual(values = c("dashed", "solid")) +
  scale_x_discrete(
    "",
    limits = c("Strong\nDem", "Weak\nDem", "Lean\nDem", "Ind.", 
               "Lean\nGOP", "Weak\nGOP", "Strong\nGOP")
  ) +
  theme_bw() + 
  ggtitle("Standard ANES Measure vs. Measure with Definitions") +
  ylab("Self-Reported Ideology") + 
  annotate("text", x = 6.5, y = 5.60, label = "r = 0.57", 
           size = 4, color = "black", fontface = "bold", family = "Times") +
  annotate("text", x = 3.5, y = 3.6, label = "r' = 0.54", 
           size = 4, color = "#0072B2", fontface = "bold", family = "Times") +
  theme(text = element_text(color = "black", family = "Times", size = 12),
        axis.text = element_text(color = "black", family = "Times", size = 10),
        legend.position = "none")

# Standard ANES measure vs. label-free measure
p2 <- ggplot(subset(df, group == 0 | group == 2), 
             aes(x = pid, y = ideo, color = group, linetype = group)) +
  geom_jitter(position = position_jitter(width = .15, height = .2, seed = 1234567),
              size = 1, alpha = .1, na.rm = T) +
  stat_smooth(method = "lm", formula = y ~ x, se = F, na.rm = T) + 
  scale_color_manual(values = c("black", "#D55E00")) +
  scale_linetype_manual(values = c("dashed", "solid")) +
  scale_x_discrete(
    "",
    limits = c("Strong\nDem", "Weak\nDem", "Lean\nDem", "Ind.", 
               "Lean\nGOP", "Weak\nGOP", "Strong\nGOP")
  ) +
  theme_bw() + 
  ggtitle("Standard ANES Measure vs. Label-Free Measure") +
  ylab("Self-Reported Ideology") + 
  annotate("text", x = 6.5, y = 5.60, label = "r = 0.57", 
           size = 4, color = "black", fontface = "bold", family = "Times") +
  annotate("text", x = 6.5, y = 4.40, label = "r' = 0.25", 
           size = 4, color = "#D55E00", fontface = "bold", family = "Times") + 
  theme(text = element_text(color = "black", family = "Times", size = 12),
        axis.text = element_text(color = "black", family = "Times", size = 10),
        legend.position = "none")

## Combine into one graph
sorting <- plot_grid(p1, p2, labels = "AUTO", ncol = 2, 
                     label_fontfamily = "Times")
sorting
